In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import gym
from gym.envs.registration import register
About the environment
The agent controls the movement of a character in a grid world. Some tiles of the grid are walkable, and others lead to the agent falling into the water. Additionally, the movement direction of the agent is uncertain and only partially depends on the chosen direction. The agent is rewarded for finding a walkable path to a goal tile.
In [2]:
fl_env = gym.make('FrozenLake-v0')
# Reset the state
state = fl_env.reset()
# Render the environment
fl_env.render()
Each state might get following possible values: {S, F, H, G}
which, refers to
SFFF (S: starting point, safe)
FHFH (F: frozen surface, safe)
FFFH (H: hole, fall to your doom)
HFFG (G: goal, where the frisbee is located)
In case of interacting with environment agent cant perform 4 action which map as follow:
FrozenLake-v0 defines "solving" as getting average reward of 0.78 over 100 consecutive trials.
We will also define a second version of the same environment but with slippery=False
parameters. That make it more deterministic.
In [3]:
register(
id='FrozenLakeNotSlippery-v0',
entry_point='gym.envs.toy_text:FrozenLakeEnv',
kwargs={'map_name': '4x4', 'is_slippery': False},
max_episode_steps=100,
reward_threshold=0.78, # optimum = .8196
)
fl_ns_env = gym.make('FrozenLakeNotSlippery-v0')
# Reset the state
state = fl_ns_env.reset()
# Render the environment
fl_ns_env.render()
In [5]:
# Import PyALCS code from local path
import sys, os
sys.path.append(os.path.abspath('../../..'))
from lcs.agents import EnvironmentAdapter
from lcs.agents.acs2 import ACS2, Configuration
# Enable automatic module reload
%load_ext autoreload
%autoreload 2
CLASSIFIER_LENGTH = 16 # Because we are operating in 4x4 grid
POSSIBLE_ACTIONS = fl_env.action_space.n # 4
In [12]:
class FrozenLakeAdapter(EnvironmentAdapter):
@staticmethod
def to_genotype(phenotype):
genotype = ['0' for i in range(CLASSIFIER_LENGTH)]
genotype[phenotype] = 'X'
return ''.join(genotype)
X
corresponds to current agent position. State 4 is encoded as follows:
In [13]:
FrozenLakeAdapter().to_genotype(4)
Out[13]:
In [16]:
from lcs.metrics import population_metrics
# We assume if the final state was with number 15 that the algorithm found the reward. Otherwise not
def fl_metrics(pop, env):
metrics = {
'found_reward': env.env.s == 15,
}
# Add basic population metrics
metrics.update(population_metrics(pop, env))
return metrics
In [17]:
def print_performance(population, metrics):
population.sort(key=lambda cl: -cl.fitness)
population_count = len(population)
reliable_count = len([cl for cl in population if cl.is_reliable()])
successful_trials = sum(m['found_reward'] for m in metrics)
print("Number of classifiers: {}".format(population_count))
print("Number of reliable classifiers: {}".format(reliable_count))
print("Percentage of successul trials: {:.2f}%".format(successful_trials / EXPLOIT_TRIALS * 100))
print("\nTop 10 classifiers:")
for cl in population[:10]:
print("{!r} \tq: {:.2f} \tr: {:.2f} \tir: {:.2f} \texp: {}".format(cl, cl.q, cl.r, cl.ir, cl.exp))
In [18]:
def plot_success_trials(metrics, ax=None):
if ax is None:
ax = plt.gca()
trials = [m['trial'] for m in metrics]
success = [m['found_reward'] for m in metrics]
ax.plot(trials, success)
ax.set_title("Successful Trials")
ax.set_xlabel("Trial")
ax.set_ylabel("Agent found reward")
In [19]:
def plot_population(metrics, ax=None):
if ax is None:
ax = plt.gca()
trials = [m['trial'] for m in metrics]
population_size = [m['numerosity'] for m in metrics]
reliable_size = [m['reliable'] for m in metrics]
ax.plot(trials, population_size, 'b', label='all')
ax.plot(trials, reliable_size, 'r', label='reliable')
ax.set_title("Population size")
ax.set_xlabel("Trial")
ax.set_ylabel("Number of macroclassifiers")
ax.legend(loc='best')
In [20]:
def plot_performance(metrics):
plt.figure(figsize=(13, 10), dpi=100)
plt.suptitle('Performance Visualization')
ax1 = plt.subplot(221)
plot_success_trials(metrics, ax1)
ax2 = plt.subplot(222)
plot_population(metrics, ax2)
plt.show()
In [21]:
cfg = Configuration(
classifier_length=CLASSIFIER_LENGTH,
number_of_possible_actions=POSSIBLE_ACTIONS,
environment_adapter=FrozenLakeAdapter(),
metrics_trial_frequency=1,
user_metrics_collector_fcn=fl_metrics,
theta_i=0.3,
epsilon=0.7)
print(cfg)
In [22]:
EXPLORE_TRIALS = 2000
EXPLOIT_TRIALS = 100
def perform_experiment(cfg, env):
# explore phase
agent = ACS2(cfg)
population_explore, metrics_explore = agent.explore(env, EXPLORE_TRIALS)
# exploit phase, reinitialize agent with population above
agent = ACS2(cfg, population=population_explore)
population_exploit, metrics_exploit = agent.exploit(env, EXPLOIT_TRIALS)
return (population_explore, metrics_explore), (population_exploit, metrics_exploit)
In [23]:
%%time
explore_results, exploit_results = perform_experiment(cfg, fl_env)
Learn some behaviour during exploration phase
In [24]:
# exploration
print_performance(explore_results[0], explore_results[1])
In [25]:
plot_performance(explore_results[1])
Metrics from exploitation
In [26]:
# exploitation
print_performance(exploit_results[0], exploit_results[1])
In [27]:
%%time
explore_results_2, exploit_results_2 = perform_experiment(cfg, fl_ns_env)
In [28]:
# exploration
print_performance(explore_results_2[0], explore_results_2[1])
In [29]:
plot_performance(explore_results_2[1])
In [30]:
# exploitation
print_performance(exploit_results_2[0], exploit_results_2[1])
In [31]:
def plot_population(metrics, ax=None):
if ax is None:
ax = plt.gca()
trials = [m['trial'] for m in metrics]
population_size = [m['numerosity'] for m in metrics]
reliable_size = [m['reliable'] for m in metrics]
ax.plot(trials, population_size, 'b', label='all')
ax.plot(trials, reliable_size, 'r', label='reliable')
ax.set_title("Population size")
ax.set_xlabel("Trial")
ax.set_ylabel("Number of macroclassifiers")
ax.legend(loc='best')
In [32]:
original = explore_results[1]
modified = explore_results_2[1]
ax = plt.gca()
trials = [m['trial'] for m in original]
original_numerosity = [m['numerosity'] for m in original]
modified_numerosity = [m['numerosity'] for m in modified]
ax.plot(trials, original_numerosity, 'r')
ax.text(1000, 350, "Original environment", color='r')
ax.plot(trials, modified_numerosity, 'b')
ax.text(1000, 40, 'No-slippery setting', color='b')
ax.set_title('Classifier numerosity in FrozenLake environment')
ax.set_xlabel('Trial')
ax.set_ylabel('Number of macroclassifiers')
plt.show()